#!/usr/bin/python3
# -*- coding:utf-8 -*-
# project:
# user:86175
# Author: 亿只羊
# createtime: 2022/4/29 9:56
import jieba
import numpy as np
import pandas as pd

# 把天气相关的语料称为正语料，其他类别的语料称为负语料
pos = pd.read_csv('./data/weather_pos.txt', encoding='UTF-8', header=None)
neg = pd.read_csv('./data/weather_neg.txt', encoding='UTF-8', header=None)

# 用jieba对每一条语料进行分词处理
pos['words'] = pos[0].apply(lambda x: jieba.lcut(x))
neg['words'] = neg[0].apply(lambda x: jieba.lcut(x))

# 正语料打上标签1，负语料打上标签0
x = np.concatenate((pos['words'], neg['words']))
y = np.concatenate((np.ones(len(pos)), np.zeros(len(neg))))
# 训练词向量


from gensim.models.word2vec import Word2Vec
word2vec = Word2Vec(x, size=300, window=3, min_count=5, sg=1, hs=1, iter=10, workers=25)
word2vec.save('./data/word2vec.model')

if __name__ == "__main__":
    pass